Based somewhat on the notebook to accompany the manuscript:
"Automatic generation of microkinetic mechanisms for heterogeneous catalysis" by
C. Franklin Goldsmith, School of Engineering, Brown University, franklin_goldsmith@brown.edu, and
Richard H. West, Department of Chemical Engineering, Northeastern University, r.west@northeastern.edu
As modified to prepare data for the AIChE conference presentation: 304c Incorporation of Linear Scaling Relations into Automatic Mechanism Generation for Heterogeneous Catalysis Richard H. West, Department of Chemical Engineering, Northeastern University, Boston, MA and C. Franklin Goldsmith, Engineering, Brown University, Providence, RI Tuesday, October 31, 2017 https://aiche.confex.com/aiche/2017/meetingapp.cgi/Paper/500172 https://www.slideshare.net/richardhwest/incorporation-of-linear-scaling-relations-into-automatic-mechanism-generation-for-heterogeneous-catalysis
Then further updated to model catalytic combustion of methanol.
First, we print what git commit we were on when we ran this notebook, for both the source code (RMG-Py) and the database.
%%bash
export RMGpy=~/Code/RMG-Py
pwd
git log -n1 --pretty=oneline
cd ../RMG-Py
pwd
git log -n1 --pretty=oneline
cd ../RMG-database
pwd
git log -n1 --pretty=oneline
We start with a base input file to generate a mechanism for CH3OH plus O2. First we print the input file we'll use to generate the model.
%cat base/input.py
Then we try running it. This will take Richard's computer about four minutes and Emily's computer 25 minutes.
%%bash
python ~/Code/RMG-Py/rmg.py base/input.py > /dev/null
tail -n12 base/RMG.log
There are 54 species and 77 reactions (?)
Next we will import some libraries and set things up to start importing and analyzing the simulation results.
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
from matplotlib import pyplot as plt
import matplotlib
# The default output Type 3 (Type3) fonts can't be edited in Adobe Illustrator
# but Type 42 (TrueType) fonts can be, making it easier to move labels around
# slightly to improve layout before publication.
matplotlib.rcParams['pdf.fonttype'] = 42
# Seaborn helps make matplotlib graphics nicer
import seaborn as sns
sns.set_style("ticks")
sns.set_context("paper", font_scale=1.5, rc={"lines.linewidth": 2.0})
import os
import re
import pandas as pd
import numpy as np
import shutil
import subprocess
import multiprocessing
def get_last_csv_file(job_directory):
"""
Find the CSV file from the largest model in the provided job directory.
For CSV files named `simulation_1_13.csv` you want 13 to be the highest number.
"""
solver_directory = os.path.join(job_directory,'solver')
csv_files = sorted([f for f in os.listdir(solver_directory) if f.endswith('.csv') ],
key=lambda f: int(f[:-4].split('_')[2]))
return os.path.join(solver_directory, csv_files[-1])
job_directory = 'base'
get_last_csv_file(job_directory)
We will use Pandas to import the csv file
last_csv_file = get_last_csv_file(job_directory)
data = pd.read_csv(last_csv_file)
data
def get_pandas_data(job_directory):
"""
Get the last CSV file from the provided job directory,
import it into a Pandas data frame, and tidy it up a bit.
"""
last_csv_file = get_last_csv_file(job_directory)
data = pd.read_csv(last_csv_file)
# Make the Time into an index and remove it as a column
data.index = data['Time (s)']
del data['Time (s)']
# Remove inerts that RMG added automatically but we're not using
for i in 'Ar He Ne'.split():
del data[i]
# Remove the Volume column
del data['Volume (m^3)']
# Set any amounts that are slightly negative (within the ODE solver's ATOL tolerance), equal to zero
# to allow 'area' plots without warnings or errors.
# Anything more negative than -1e-16 probably indicates a bug in RMG and should not be hidden here ;-)
data[(data<0) & (data>-1e-16)] = 0
return data
def rename_columns(data):
"""
Removes the number (##) from the end of the column names, in place,
unless doing so would make the names collide.
Also renames a few species so the plot labels match the names in the manuscript.
"""
import re
old = data.columns
new = [re.sub('\(\d+\)$','',n) for n in old]
# don't translate names that would no longer be unique
mapping = {k:v for k,v in zip(old,new) if new.count(v)==1}
data.rename(columns=mapping, inplace=True)
# Now change a few species that are named differently in the manuscript
# than in the thermodynamics database used to build the model,
# so that the plot labels match the manuscript.
mapping = {'COX':'COvdwX', 'OCX': 'CO=X', 'C2H3X':'CH3CX', 'C2H3OX':'CH3CXO'}
data.rename(columns=mapping, inplace=True)
data1 = get_pandas_data('base')
rename_columns(data1)
data1.columns
# Test it with some plots
data1[['H2', 'CO', 'O2', 'CH4', 'H2O']].plot.line(
loglog=True, ylim=(1e-3,0.5), xlim=(1e-3,1e3))
data1[['X', 'CH4X', 'OX', 'CX']].plot.line(
loglog=True, ylim=(1e-15,0.5), xlim=(1e-9,1e3))
data1[['H2O']].plot.line(
loglog=True, ylim=(1e-3,0.5), xlim=(1e-3,1e3))
data1[['CH4', 'O2', 'H2', 'CO', 'H2O']].plot.line()
species_names = data1.columns
# just the gas phase species that aren't always zero:
gas_phase = [n for n in species_names if 'X' not in n and (data1[n]>0).any()]
# all the surface species
surface_phase = [n for n in species_names if 'X' in n]
surface_phase.remove('X')
data1[gas_phase].plot.line()
data1[surface_phase].plot.line()
print "Significant species (those that exceed 0.001 mol at some point)"
significant = [n for n in data1.columns if(data1[n]>0.001).any()]
with sns.color_palette("hls", len(significant)):
data1[significant].plot.area(legend='reverse')
lim = 1e-4
surface = [n for n in data1.columns if 'X' in n and n!='X' and (data1[n]>lim).any() ]
print "The {} surface species that exceed {:.1e} mol at some point".format(len(surface), lim)
total_sites = max(data1['X'])
with sns.color_palette('Set3',len(surface)):
(data1[surface]/total_sites).plot.area(legend='reverse')
plt.ylabel('site fraction')
plt.xlim(0,1e-2) ## notice this is much less than 1 seconde
Now we will use that template 'base' input file to create a ton of other input files with different binding energies, then run them all in RMG-Cat, then process the results.
# First, make a series of input files in separate directories
with open(os.path.join('base', 'input.py')) as infile:
input_file = infile.read()
base_directory = 'binding_energies'
def directory(carbon,oxygen):
return os.path.join(base_directory, "c{:.3f}o{:.3f}".format(carbon,oxygen))
def make_input(binding_energies):
"""
Make an input file for the given (carbon,oxygen) tuple (or iterable) of binding energies
and return the name of the directory in which it is saved.
"""
carbon, oxygen = binding_energies
output = input_file
out_dir = directory(carbon, oxygen)
carbon_string = "'C':({:f}, 'eV/molecule')".format(carbon)
output = re.sub("'C':\(.*?, 'eV/molecule'\)", carbon_string, output)
oxygen_string = "'O':({:f}, 'eV/molecule')".format(oxygen)
output = re.sub("'O':\(.*?, 'eV/molecule'\)", oxygen_string, output)
os.path.exists(out_dir) or os.makedirs(out_dir)
out_file = os.path.join(out_dir, 'input.py')
with open(out_file,'w') as outfile:
outfile.write(output)
shutil.copy(os.path.join('base','run.sh'), out_dir)
return out_dir
print make_input((-8,-3.5))
def run_simulation(binding_energies):
"""
Assuming a job file already exists, run it. This one is local.
Takes a tuple of binding energies, (carbon, oxygen)
"""
carbon, oxygen = binding_energies
job_directory = directory(carbon, oxygen)
print job_directory
assert os.path.exists(job_directory)
return subprocess.check_call('./run.sh', cwd=job_directory)
# Revised range
carbon_range = (-7.5, -2)
oxygen_range = (-6.5, -1.5)
grid_size = 9
mesh = np.mgrid[carbon_range[0]:carbon_range[1]:grid_size*1j,
oxygen_range[0]:oxygen_range[1]:grid_size*1j]
with sns.axes_style("whitegrid"):
plt.axis('square')
plt.xlim(carbon_range)
plt.ylim(oxygen_range)
plt.show()
mesh
experiments = mesh.reshape((2,-1)).T
experiments
with sns.axes_style("whitegrid"):
plt.axis('square')
plt.xlim(carbon_range)
plt.ylim(oxygen_range)
plt.plot(*experiments.T, marker='o', linestyle='none')
map(make_input, experiments)
# Now run the simulations using a pool.
# Don't run this cell unless you have a while to wait!! ###
#pool = multiprocessing.Pool()
#result = pool.map(run_simulation, experiments)
# Instead, upload the input files to a cluster and run `start_all.sh`
# The script `stage_all.sh` will stage (add) the results in git
# so you can commit and get them back to analyze with the subsequent cells:
base_directory = 'binding_energies'
# base_directory = 'binding_energies_local'
def get_data(experiment):
carbon, oxygen = experiment
directory(carbon,oxygen)
try:
data = get_pandas_data(directory(carbon,oxygen))
except OSError:
return None
rename_columns(data)
return data
datas = {tuple(e): get_data(e) for e in experiments}
def get_max_co2(experiment):
data = datas[tuple(experiment)]
if data is None:
return np.nan
return data[['CO2']].max()
highest_co2 = max([float(get_max_co2(e)) for e in experiments])
For this first attempt to extract "rate" we fit an exponential growth curve to the normalized CO2 concentration profile of each simulation. First we plot all the curves on one plot, then we'll plot each with its fitted exponential.
import seaborn as sns
plt.figure(figsize=(5, 4))
num_lines = len(experiments)
sns.set_palette(sns.color_palette("coolwarm",num_lines))
ax = plt.axes()
def make_label(experiment):
return "{:+.1f}, {:+.1f}".format(*experiment)
for experiment in experiments:
#print experiment
data = get_data(experiment)
if data is None:
print("No data for {}".format(experiment))
continue
times = np.array(data.index)
normalized = data[['CO2']].values / highest_co2
ax.plot(times, normalized, label=make_label(experiment))
#normalized.plot.line(ax=ax, label=make_label(experiment))
#ax.text(times[-10], normalized[-10], make_label(experiment))
#plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xlabel('Time (s)')
plt.ylabel('Normalized CO$_2$ concentration')
sns.set_palette('Set1')
def my_function(time, rate):
"Thing we want to fit."
return 1. - np.exp(-1*time*rate)
import scipy.optimize
def fit_rate(data):
normalized = data[['CO2']] / highest_co2
x_data = np.array(normalized.index)
y_data = normalized.values[:,0]
try:
popt, pcov = scipy.optimize.curve_fit(my_function, x_data, y_data)
except RuntimeError as e:
print("☝️ couldn't fit: {}".format(e.message))
return np.nan
optimal_parameters = popt
parameter_errors = np.sqrt(np.diag(pcov))
print("Rate: {} +/- {} (1 st. dev.)".format(optimal_parameters[0],parameter_errors[0]))
plt.plot(x_data, y_data, 'o')
plt.plot(x_data, my_function(x_data, *optimal_parameters))
plt.xlabel('Time (s)')
plt.ylabel('Normalized CO$_2$ concentration')
plt.show()
fitted_rate = optimal_parameters[0]
return fitted_rate
rates = []
for experiment in experiments:
print experiment
data = get_data(experiment)
if data is None:
print("☝️No data")
rates.append(np.nan)
continue
rate = fit_rate(data)
rates.append(rate)
rates
rates = np.array(rates)
fixed_rates = rates * (rates>0) + (1e-9 * (rates<0))
log_rates = np.log(fixed_rates)
log_rates
rate_grid = np.reshape(log_rates, (grid_size,grid_size))
ax = sns.heatmap(rate_grid)
extent = carbon_range + oxygen_range
extent
# Because the center of a corner pixel is in fact the corner of the grid
# we want to stretch the image a little
c_step = mesh[0,1,0]-mesh[0,0,0]
o_step = mesh[1,0,1]-mesh[1,0,0]
carbon_range2 = (carbon_range[0]-c_step/2, carbon_range[1]+c_step/2)
oxygen_range2 = (oxygen_range[0]-c_step/2, oxygen_range[1]+c_step/2)
extent2 = carbon_range2 + oxygen_range2
extent2
plt.imshow(rate_grid.T, interpolation='none', origin='lower', extent=extent2, aspect='equal')
plt.plot(-5.997, -4.485, 'ok')
plt.text(-5.997, -4.485, 'Ni(111)')
# Binding energies extracted from:
u"""
Medford, A. J.; Lausche, A. C.; Abild-Pedersen, F.;
Temel, B.; Schjødt, N. C.; Nørskov, J. K.; Studt, F.
Activity and Selectivity Trends in
Synthesis Gas Conversion to Higher Alcohols.
Topics in Catalysis 2014, 57 (1-4), 135–142
DOI: 10.1007/s11244-013-0169-0.
"""
medford_energies = { # Carbon, then Oxygen
'Ru': ( 0.010349288486416697, -2.8153856448231256),
'Rh': ( 0.16558861578266493, -2.546620868091181),
'Ni': ( 0.3001293661060802, -2.5881741535441853),
'Ir': ( 0.36222509702457995, -2.826185484230718),
'Pd': ( 0.28460543337645516, -1.207119596734621),
'Pt': ( 0.8796895213454077, -1.445820136503547),
'Cu': ( 2.323415265200518, -1.7218249542757729),
'Ag': ( 3.855109961190168, -0.8341504215550701),
'Au': ( 3.5601552393272975, -0.10963108355266138),
}
# Shift Medford's energies so that Ni matches Wayne Blaylock's Ni
blaylock_ni = np.array([-5.997, -4.485])
old_ni = np.array(medford_energies['Ni'])
shifted_energies = {metal: tuple(blaylock_ni + np.array(E)-old_ni) for metal,E in medford_energies.items()}
shifted_energies
plt.imshow(rate_grid.T, interpolation='none', origin='lower', extent=extent2, aspect='equal')
for metal, coords in shifted_energies.iteritems():
plt.plot(coords[0], coords[1], 'ok')
plt.text(coords[0], coords[1], metal)
plt.xlim(carbon_range)
plt.ylim(oxygen_range)
# Binding energies for close packed surfaces from
"""
Abild-Pedersen, F.; Greeley, J.; Studt, F.; Rossmeisl, J.;
Munter, T. R.; Moses, P. G.; Skúlason, E.; Bligaard, T.; Norskov, J. K.
Scaling Properties of Adsorption Energies for Hydrogen-Containing
Molecules on Transition-Metal Surfaces.
Phys. Rev. Lett. 2007, 99 (1), 016105
DOI: 10.1103/PhysRevLett.99.016105.
"""
abildpedersen_energies = { # Carbon, then Oxygen
'Ru': ( -6.397727272727272, -5.104763568600047),
'Rh': ( -6.5681818181818175, -4.609771721406942),
'Ni': ( -6.045454545454545, -4.711681807593758),
'Ir': ( -6.613636363636363, -5.94916142557652),
'Pd': ( -6, -3.517877940833916),
'Pt': ( -6.363636363636363, -3.481481481481482),
'Cu': ( -4.159090909090907, -3.85272536687631),
'Ag': ( -2.9545454545454533, -2.9282552993244817),
'Au': ( -3.7499999999999973, -2.302236198462614),
}
abildpedersen_energies['Pt'][0] - abildpedersen_energies['Ni'][0]
plt.imshow(rate_grid.T, interpolation='none', origin='lower', extent=extent2, aspect='equal')
for metal, coords in abildpedersen_energies.iteritems():
color = {'Ag':'w','Au':'w','Cu':'w'}.get(metal,'k')
plt.plot(coords[0], coords[1], 'o'+color)
plt.text(coords[0], coords[1], metal, color=color)
plt.xlim(carbon_range)
plt.ylim(oxygen_range)
plt.xlabel('$\Delta E^C$ (eV)')
plt.ylabel('$\Delta E^O$ (eV)')
For this, we plot the CO2 concentration profiles on a log-log plot, and see the characteristic time as the time at which it crosses the diagonal, i.e. when does $\left( \log_{10}(time) + \log_{10}(\frac{CO_2}{CO_2max}) \right) \ge -10$
"""
Plot everything on a log-log plot
"""
import seaborn as sns
plt.figure(figsize=(5, 4))
num_lines = len(experiments)
sns.set_palette(sns.color_palette("coolwarm",num_lines))
ax = plt.axes()
def make_label(experiment):
return "{:+.1f}, {:+.1f}".format(*experiment)
for experiment in experiments:
print experiment
data = get_data(experiment)
if data is None:
print("☝️No data")
continue
times = np.array(data.index)
normalized = data[['CO2']].values[:,0] / highest_co2
ax.loglog(times, normalized, label=make_label(experiment))
try:
i = (np.nonzero((np.log10(times)+np.log10(normalized)) > -10))[0][0]
except IndexError:
i = -1
print i, times[i], normalized[i]
ax.text(times[i], normalized[i], make_label(experiment), rotation=45, ha='center', va='center', fontsize=12)
plt.plot([1e-10, 1],[1,1e-10], 'g:')
plt.ylim(1e-10, 1)
plt.xlim(1e-10, 1)
#plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xlabel('Time (s)')
plt.ylabel('Normalized CO2 concentration')
"""
An alternative way of defining rates.
"""
new_rates = []
for experiment in experiments:
#print experiment
data = get_data(experiment)
if data is None:
print("No data for experiment {}".format(experiment))
new_rates.append(np.nan)
continue
times = np.array(data.index)
normalized = data[['CO2']].values[:,0] / highest_co2
try:
i = (np.nonzero((np.log10(times)+np.log10(normalized)) > -10))[0][0]
time = times[i]
except IndexError:
time = 1.0
new_rates.append(1./time)
new_log_rates = np.log(np.array(new_rates))
print new_log_rates
new_rate_grid = np.reshape(new_log_rates, (grid_size,grid_size))
plt.imshow(new_rate_grid.T, interpolation='none', origin='lower', extent=extent2, aspect='equal')
for metal, coords in abildpedersen_energies.iteritems():
color = {'Ag':'w','Au':'w',}.get(metal,'k')
plt.plot(coords[0], coords[1], 'o'+color)
plt.text(coords[0], coords[1], metal, color=color)
plt.xlim(carbon_range)
plt.ylim(oxygen_range)
plt.xlabel('$\Delta E^C$ (eV)')
plt.ylabel('$\Delta E^O$ (eV)')
new_rate_grid = np.reshape(new_log_rates, (grid_size,grid_size))
plt.imshow(new_rate_grid.T, interpolation='spline16', origin='lower', extent=extent2, aspect='equal')
for metal, coords in abildpedersen_energies.iteritems():
color = {'Ag':'w','Au':'w',}.get(metal,'k')
plt.plot(coords[0], coords[1], 'o'+color)
plt.text(coords[0], coords[1], metal, color=color)
plt.xlim(carbon_range)
plt.ylim(oxygen_range)
plt.xlabel('$\Delta E^C$ (eV)')
plt.ylabel('$\Delta E^O$ (eV)')
def get_reaction_count(experiment):
d = directory(*experiment)
f = os.path.join(d,'chemkin','chem_annotated-surface.inp')
with open(f) as chemkin:
r = re.compile('\! Reaction index: Chemkin \#(\d+)')
for line in chemkin:
m = r.match(line)
if m:
count = int(m.group(1))
return count
get_reaction_count(experiment)
i=35
print experiments[i]
print get_reaction_count(experiments[i])
reaction_counts = map(get_reaction_count, experiments)
reaction_counts_grid = np.log10(np.reshape(reaction_counts, (grid_size,grid_size)))
plt.imshow(reaction_counts_grid.T, interpolation='none', origin='lower', extent=extent2, aspect='equal')
for metal, coords in abildpedersen_energies.iteritems():
color = {'Ag':'w','Au':'w','Cu':'w'}.get(metal,'k')
color='k'
plt.plot(coords[0], coords[1], 'o'+color)
plt.text(coords[0], coords[1], metal, color=color)
plt.xlim(carbon_range2)
plt.ylim(oxygen_range2)
plt.xlabel('$\Delta E^C$ (eV)')
plt.ylabel('$\Delta E^O$ (eV)')
for e,n in zip(experiments,reaction_counts):
plt.text(e[0],e[1],n,color='w',ha='center', va='center')
#plt.colorbar()
# A linear one, just to check it looks the same
reaction_counts_grid = np.reshape(reaction_counts, (grid_size,grid_size))
ax = sns.heatmap(reaction_counts_grid.T[::-1,:], annot=True, fmt='d', square=True)